@article{li2019jasper,
  title={Jasper: An End-to-End Convolutional Neural Acoustic Model},
  author={Li, Jason and Lavrukhin, Vitaly and Ginsburg, Boris and Leary, Ryan and Kuchaiev, Oleksii and Cohen, Jonathan M and Nguyen, Huyen and Gadde, Ravi Teja},
  journal={arXiv preprint arXiv:1904.03288},
  year={2019}
}


@inproceedings{panayotov2015librispeech,
  title={Librispeech: an ASR corpus based on public domain audio books},
  author={Panayotov, Vassil and Chen, Guoguo and Povey, Daniel and Khudanpur, Sanjeev},
  booktitle={2015 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},
  pages={5206--5210},
  year={2015},
  organization={IEEE}
}

@misc{huggingface2019transformers,
    title={A library of state-of-the-art pretrained models for Natural Language Processing (NLP)},
    howpublished={\url{https://github.com/huggingface/pytorch-transformers}},
    year={Accessed August 23, 2019}
}

@article{ginsburg2019stochastic,
  title={Stochastic Gradient Methods with Layer-wise Adaptive Moments for Training of Deep Networks},
  author={Ginsburg, Boris and Castonguay, Patrice and Hrinchuk, Oleksii and Kuchaiev, Oleksii and Lavrukhin, Vitaly and Leary, Ryan and Li, Jason and Nguyen, Huyen and Cohen, Jonathan M},
  journal={arXiv preprint arXiv:1905.11286},
  year={2019}
}

@misc{lee2019biobert,
    title={BioBERT: a pre-trained biomedical language representation model for biomedical text mining},
    author={Jinhyuk Lee and Wonjin Yoon and Sungdong Kim and Donghyeon Kim and Sunkyu Kim and Chan Ho So and Jaewoo Kang},
    year={2019},
    eprint={1901.08746},
    archivePrefix={arXiv},
    primaryClass={cs.CL}
}

@misc{beltagy2019scibert,
    title={SciBERT: Pretrained Contextualized Embeddings for Scientific Text},
    author={Iz Beltagy and Arman Cohan and Kyle Lo},
    year={2019},
    eprint={1903.10676},
    archivePrefix={arXiv},
    primaryClass={cs.CL}
}

@article{baevski2018adaptive,
  title={Adaptive input representations for neural language modeling},
  author={Baevski, Alexei and Auli, Michael},
  journal={arXiv preprint arXiv:1809.10853},
  year={2018}
}

@article{merity2016pointer,
  title={Pointer sentinel mixture models},
  author={Merity, Stephen and Xiong, Caiming and Bradbury, James and Socher, Richard},
  journal={arXiv preprint arXiv:1609.07843},
  year={2016}
}

@article{devlin2018bert,
  title={Bert: Pre-training of deep bidirectional transformers for language understanding},
  author={Devlin, Jacob and Chang, Ming-Wei and Lee, Kenton and Toutanova, Kristina},
  journal={arXiv preprint arXiv:1810.04805},
  year={2018}
}

@inproceedings{vaswani2017attention,
  title={Attention is all you need},
  author={Vaswani, Ashish and Shazeer, Noam and Parmar, Niki and Uszkoreit, Jakob and Jones, Llion and Gomez, Aidan N and Kaiser, {\L}ukasz and Polosukhin, Illia},
  booktitle={Advances in neural information processing systems},
  pages={5998--6008},
  year={2017}
}


@article{ott2018scaling,
  title={Scaling neural machine translation},
  author={Ott, Myle and Edunov, Sergey and Grangier, David and Auli, Michael},
  journal={arXiv preprint arXiv:1806.00187},
  year={2018}
}


@article{sennrich2015neural,
  title={Neural machine translation of rare words with subword units},
  author={Sennrich, Rico and Haddow, Barry and Birch, Alexandra},
  journal={arXiv preprint arXiv:1508.07909},
  year={2015}
}


@article{press2016using,
  title={Using the output embedding to improve language models},
  author={Press, Ofir and Wolf, Lior},
  journal={arXiv preprint arXiv:1608.05859},
  year={2016}
}


@article{post2018call,
  title={A call for clarity in reporting bleu scores},
  author={Post, Matt},
  journal={arXiv preprint arXiv:1804.08771},
  year={2018}
}


@article{ott2018analyzing,
  title={Analyzing uncertainty in neural machine translation},
  author={Ott, Myle and Auli, Michael and Grangier, David and Ranzato, Marc'Aurelio},
  journal={arXiv preprint arXiv:1803.00047},
  year={2018}
}

@article{radford2019language,
  title={Language models are unsupervised multitask learners},
  author={Radford, Alec and Wu, Jeffrey and Child, Rewon and Luan, David and Amodei, Dario and Sutskever, Ilya},
  journal={OpenAI Blog},
  volume={1},
  number={8},
  year={2019}
}

@article{chen2019bert,
  title={BERT for Joint Intent Classification and Slot Filling},
  author={Chen, Qian and Zhuo, Zhu and Wang, Wen},
  journal={arXiv preprint arXiv:1902.10909},
  year={2019}
}


@article{budzianowski2018multiwoz,
  title={MultiWOZ-a large-scale multi-domain wizard-of-oz dataset for task-oriented dialogue modelling},
  author={Budzianowski, Pawe{\l} and Wen, Tsung-Hsien and Tseng, Bo-Hsiang and Casanueva, Inigo and Ultes, Stefan and Ramadan, Osman and Ga{\v{s}}i{\'c}, Milica},
  journal={arXiv preprint arXiv:1810.00278},
  year={2018}
}

@article{eric2019multiwoz,
  title={MultiWOZ 2.1: Multi-domain dialogue state corrections and state tracking baselines},
  author={Eric, Mihail and Goel, Rahul and Paul, Shachi and Sethi, Abhishek and Agarwal, Sanchit and Gao, Shuyag and Hakkani-Tur, Dilek},
  journal={arXiv preprint arXiv:1907.01669},
  year={2019}
}


@article{wu2019transferable,
  title={Transferable multi-domain state generator for task-oriented dialogue systems},
  author={Wu, Chien-Sheng and Madotto, Andrea and Hosseini-Asl, Ehsan and Xiong, Caiming and Socher, Richard and Fung, Pascale},
  journal={arXiv preprint arXiv:1905.08743},
  year={2019}
}


@article{henderson2015machine,
  title={Machine learning for dialog state tracking: A review},
  author={Henderson, Matthew},
  journal={research.google},
  year={2015}
}
